%load_ext autoreload
%autoreload 2
import os
MOMAPS_HOME = '/home/labs/hornsteinlab/Collaboration/MOmaps_Noam/MOmaps'
MOMAPS_DATA_HOME = '/home/labs/hornsteinlab/Collaboration/MOmaps'
LOGS_PATH = os.path.join(MOMAPS_DATA_HOME, "outputs/preprocessing/spd/logs/dNLS")
PLOT_PATH = os.path.join(MOMAPS_HOME, 'src', 'preprocessing', 'notebooks','figures','Neurons')
os.chdir(MOMAPS_HOME)
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
plt.rcParams["image.cmap"] = "Set1"
from src.common.lib.preprocessing_utils import rescale_intensity
from src.common.lib.images_qc import *
import contextlib
import io
import matplotlib
import warnings
warnings.filterwarnings('ignore', category=pd.core.common.SettingWithCopyWarning)
from src.common.lib.qc_config_tmp import *
from src.common.lib.image_sampling_utils import *
from matplotlib.colors import LinearSegmentedColormap
df = log_files_qc(LOGS_PATH)
df_dapi = df[df.marker=='DAPI']
df_target = df[df.marker!='DAPI']
reading logs of batch2 reading logs of batch4 reading logs of batch3 reading logs of batch5 Total of 4 files were read. Before dup handeling (100193, 22) After duplication removal #1: (100193, 23) After duplication removal #2: (100193, 23)
# choose batches
batches = [f'batch{i}' for i in range(2,6)]
batches
['batch2', 'batch3', 'batch4', 'batch5']
root_directory_raw = os.path.join(MOMAPS_DATA_HOME, 'input', 'images', 'raw', 'SpinningDisk','deltaNLS_sort')
batches_raw = [batch.replace("_16bit_no_downsample","") for batch in batches]
raws = run_validate_folder_structure(root_directory_raw, False, dnls_panels, dnls_markers.copy(),PLOT_PATH, dnls_marker_info,
dnls_cell_lines_to_cond, reps, dnls_cell_lines_for_disp, dnls_expected_dapi_raw,
batches=batches_raw, fig_width=5)
batch2 Folder structure is valid. No bad files are found. Total Sites: 27000
======== batch3 Folder structure is invalid. Missing paths: /home/labs/hornsteinlab/Collaboration/MOmaps/input/images/raw/SpinningDisk/deltaNLS_sort/batch3/WT/panelN /home/labs/hornsteinlab/Collaboration/MOmaps/input/images/raw/SpinningDisk/deltaNLS_sort/batch3/TDP43/panelN No bad files are found. Total Sites: 25800
======== batch4 Folder structure is invalid. Missing paths: /home/labs/hornsteinlab/Collaboration/MOmaps/input/images/raw/SpinningDisk/deltaNLS_sort/batch4/WT/panelN /home/labs/hornsteinlab/Collaboration/MOmaps/input/images/raw/SpinningDisk/deltaNLS_sort/batch4/TDP43/panelN No bad files are found. Total Sites: 25500
======== batch5 Folder structure is valid. No bad files are found. Total Sites: 26897
======== ====================
root_directory_proc = os.path.join(MOMAPS_DATA_HOME, 'input', 'images', 'processed', 'spd2',
'SpinningDisk','deltaNLS')
procs = run_validate_folder_structure(root_directory_proc, True, dnls_panels, dnls_markers,PLOT_PATH,dnls_marker_info,
dnls_cell_lines_to_cond, reps, dnls_cell_lines_for_disp, dnls_expected_dapi_raw,
batches=batches, fig_width=5)
batch2 Folder structure is valid. No bad files are found. Total Sites: 25586
======== batch3 Folder structure is invalid. Missing paths: /home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/deltaNLS/batch3/WT/Untreated/TDP43N /home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/deltaNLS/batch3/TDP43/dox/TDP43N /home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/deltaNLS/batch3/TDP43/Untreated/TDP43N No bad files are found. Total Sites: 24336
======== batch4 Folder structure is invalid. Missing paths: /home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/deltaNLS/batch4/WT/Untreated/TDP43N /home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/deltaNLS/batch4/TDP43/dox/TDP43N /home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/deltaNLS/batch4/TDP43/Untreated/TDP43N No bad files are found. Total Sites: 23638
======== batch5 Folder structure is valid. No bad files are found. Total Sites: 25527
======== ====================
display_diff(batches, raws, procs, PLOT_PATH, fig_width=5)
batch2
======== batch3
======== batch4
======== batch5
========
#for batch in list(range(3,9)) + ['7_16bit','8_16bit','9_16bit']:
for batch in batches:
with contextlib.redirect_stdout(io.StringIO()):
var = sample_and_calc_variance(root_directory_proc, batch,
sample_size_per_markers=200, cond_count=2, rep_count=len(reps),
num_markers=len(dnls_markers))
print(f'{batch} var: ',var)
batch2 var: 0.011521469031638067 batch3 var: 0.010357440534791738 batch4 var: 0.010663615289945838 batch5 var: 0.01020901013294272
By order of filtering
Percentage out of the total sites
dapi_filter_by_brenner = show_site_survival_dapi_brenner(df_dapi,batches, dnls_line_colors, dnls_panels)
Percentage out of the sites that passed the previous filter. In parenthesis are absolute values.
A site will be filtered out if Cellpose found 0 cells in it.
dapi_filter_by_cellpose = show_site_survival_dapi_cellpose(df_dapi, batches, dapi_filter_by_brenner, dnls_line_colors, dnls_panels)
Percentage out of the sites that passed the previous filter. In parenthesis are absolute values.
A site will be filtered out if after tiling, no tile is containing at least 85% of a cell that Cellpose detected.
dapi_filter_by_tiling=show_site_survival_dapi_tiling(df_dapi, batches, dapi_filter_by_cellpose, dnls_line_colors, dnls_panels)
Percentage out of the sites that passed the previous filter. In parenthesis are absolute values (if different than the percentages).
show_site_survival_target_brenner(df_dapi, df_target, dapi_filter_by_tiling)
names = ['Total number of tiles', 'Total number of whole cells']
stats = ['n_valid_tiles','site_whole_cells_counts_sum','site_cell_count']
total_sum = calc_total_sums(df_target, df_dapi, stats)
for stat, name in zip(stats[:2], names):
to_heatmap = total_sum.rename(columns={stat:'index'})
plot_filtering_heatmap(to_heatmap, extra_index='marker', vmin=None, vmax=None,
xlabel = name, show_sum=True, figsize=(4,8))
show_total_sum_tables(total_sum)
| n_valid_tiles | % valid tiles | site_whole_cells_counts_sum | site_cell_count | |
|---|---|---|---|---|
| batch2 | ||||
| count | 156.000000 | 156.000000 | 156.000000 | 156.000000 |
| mean | 487.147436 | 4.871474 | 536.814103 | 1409.724359 |
| std | 152.615680 | 1.526157 | 174.064312 | 557.999637 |
| min | 211.000000 | 2.110000 | 224.000000 | 433.000000 |
| 25% | 369.000000 | 3.690000 | 398.000000 | 873.500000 |
| 50% | 456.500000 | 4.565000 | 496.000000 | 1410.000000 |
| 75% | 613.250000 | 6.132500 | 687.500000 | 1871.750000 |
| max | 831.000000 | 8.310000 | 919.000000 | 2525.000000 |
| sum | 75995.000000 | NaN | 83743.000000 | 219917.000000 |
| expected_count | 450.000000 | 450.000000 | 450.000000 | 450.000000 |
| n_valid_tiles | % valid tiles | site_whole_cells_counts_sum | site_cell_count | |
|---|---|---|---|---|
| batch3 | ||||
| count | 150.000000 | 150.000000 | 150.000000 | 150.000000 |
| mean | 579.380000 | 5.793800 | 647.213333 | 1618.353333 |
| std | 211.183392 | 2.111834 | 245.000632 | 718.404077 |
| min | 123.000000 | 1.230000 | 131.000000 | 285.000000 |
| 25% | 332.500000 | 3.325000 | 358.000000 | 746.750000 |
| 50% | 673.000000 | 6.730000 | 756.500000 | 1941.000000 |
| 75% | 759.500000 | 7.595000 | 849.500000 | 2233.250000 |
| max | 891.000000 | 8.910000 | 1006.000000 | 2647.000000 |
| sum | 86907.000000 | NaN | 97082.000000 | 242753.000000 |
| expected_count | 450.000000 | 450.000000 | 450.000000 | 450.000000 |
| n_valid_tiles | % valid tiles | site_whole_cells_counts_sum | site_cell_count | |
|---|---|---|---|---|
| batch4 | ||||
| count | 149.000000 | 149.000000 | 149.000000 | 149.000000 |
| mean | 528.865772 | 5.288658 | 586.469799 | 1501.711409 |
| std | 197.328908 | 1.973289 | 225.492451 | 669.566533 |
| min | 40.000000 | 0.400000 | 42.000000 | 81.000000 |
| 25% | 350.000000 | 3.500000 | 398.000000 | 808.000000 |
| 50% | 566.000000 | 5.660000 | 629.000000 | 1709.000000 |
| 75% | 696.000000 | 6.960000 | 781.000000 | 2098.000000 |
| max | 887.000000 | 8.870000 | 993.000000 | 2613.000000 |
| sum | 78801.000000 | NaN | 87384.000000 | 223755.000000 |
| expected_count | 450.000000 | 450.000000 | 450.000000 | 450.000000 |
| n_valid_tiles | % valid tiles | site_whole_cells_counts_sum | site_cell_count | |
|---|---|---|---|---|
| batch5 | ||||
| count | 156.000000 | 156.000000 | 156.000000 | 156.000000 |
| mean | 554.634615 | 5.546346 | 619.596154 | 1550.474359 |
| std | 190.310852 | 1.903109 | 220.682834 | 659.156092 |
| min | 105.000000 | 1.050000 | 114.000000 | 249.000000 |
| 25% | 368.250000 | 3.682500 | 396.000000 | 821.250000 |
| 50% | 593.000000 | 5.930000 | 683.000000 | 1718.000000 |
| 75% | 734.000000 | 7.340000 | 830.000000 | 2144.000000 |
| max | 839.000000 | 8.390000 | 955.000000 | 2503.000000 |
| sum | 86523.000000 | NaN | 96657.000000 | 241874.000000 |
| expected_count | 450.000000 | 450.000000 | 450.000000 | 450.000000 |
| n valid tiles | % valid tiles | site_whole_cells_counts_sum | site_cell_count | |
|---|---|---|---|---|
| All batches | ||||
| count | 611.000000 | 611.000000 | 611.000000 | 611.000000 |
| mean | 537.194763 | 5.371948 | 597.162029 | 1519.310966 |
| std | 191.403650 | 1.914036 | 220.853208 | 655.868995 |
| min | 40.000000 | 0.400000 | 42.000000 | 81.000000 |
| 25% | 358.000000 | 3.580000 | 386.500000 | 807.500000 |
| 50% | 563.000000 | 5.630000 | 628.000000 | 1693.000000 |
| 75% | 705.000000 | 7.050000 | 795.500000 | 2098.000000 |
| max | 891.000000 | 8.910000 | 1006.000000 | 2647.000000 |
| sum | 328226.000000 | NaN | 364866.000000 | 928299.000000 |
| expected_count | 450.000000 | 450.000000 | 450.000000 | 450.000000 |
df_no_empty_sites = df_dapi[df_dapi.n_valid_tiles !=0]
plot_cell_count(df_no_empty_sites, dnls_lines_order, dnls_custom_palette, y='site_cell_count_sum',
title='Cell Count Average per Site (from tiles)')
plot_cell_count(df_no_empty_sites, dnls_lines_order, dnls_custom_palette, y='site_whole_cells_counts_sum',
title='Whole Cell Count Average per Site')
plot_cell_count(df_no_empty_sites, dnls_lines_order, dnls_custom_palette, y='site_cell_count',
title='Cellpose Cell Count Average per Site')
plot_catplot(df_dapi, custom_palette,reps, x='n_valid_tiles', x_title='valid tiles count', batch_min=2, batch_max=5)
plot_hm(df_dapi, split_by='rep', rows='cell_line_cond', columns='panel')
for batch in batches:
print(batch)
#batch_num = batch.replace('batch',"")
run_calc_hist_new(f'deltaNLS_sort/{batch}', dnls_cell_lines_for_disp, dnls_markers,
hist_sample=10,sample_size_per_markers=200, ncols=8, nrows=4, dnls=True)
print("="*30)
batch2
/home/labs/hornsteinlab/Collaboration/MOmaps_Noam/MOmaps/src/common/lib/images_qc.py:914: UserWarning: This figure includes Axes that are not compatible with tight_layout, so results might be incorrect. plt.tight_layout()
/home/labs/hornsteinlab/Collaboration/MOmaps/anaconda3/momaps_37/lib/python3.7/site-packages/IPython/core/pylabtools.py:151: UserWarning: This figure includes Axes that are not compatible with tight_layout, so results might be incorrect. fig.canvas.print_figure(bytes_io, **kw)
============================== batch3
============================== batch4
============================== batch5
==============================
# save notebook as HTML ( the HTML will be saved in the same folder the original script is)
from IPython.display import display, Javascript
display(Javascript('IPython.notebook.save_checkpoint();'))
os.system('jupyter nbconvert --to html src/preprocessing/notebooks/no_ds/qc_report_newPP_dNLS.ipynb')
[NbConvertApp] Converting notebook src/preprocessing/notebooks/no_ds/qc_report_newPP_dNLS.ipynb to html [NbConvertApp] Writing 16690053 bytes to src/preprocessing/notebooks/no_ds/qc_report_newPP_dNLS.html
0